importing libraries
import numpy as np
import pandas as pd
from os import path
from PIL import Image
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import matplotlib.pyplot as plt
import seaborn as sns
importing data
data = pd.read_csv('/Users/rheasablani/Desktop/philosophy_data.csv')
data.head()
| title | author | school | sentence_spacy | sentence_str | original_publication_date | corpus_edition_date | sentence_length | sentence_lowered | tokenized_txt | lemmatized_str | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Plato - Complete Works | Plato | plato | What's new, Socrates, to make you leave your ... | What's new, Socrates, to make you leave your ... | -350 | 1997 | 125 | what's new, socrates, to make you leave your ... | ['what', 'new', 'socrates', 'to', 'make', 'you... | what be new , Socrates , to make -PRON- lea... |
| 1 | Plato - Complete Works | Plato | plato | Surely you are not prosecuting anyone before t... | Surely you are not prosecuting anyone before t... | -350 | 1997 | 69 | surely you are not prosecuting anyone before t... | ['surely', 'you', 'are', 'not', 'prosecuting',... | surely -PRON- be not prosecute anyone before ... |
| 2 | Plato - Complete Works | Plato | plato | The Athenians do not call this a prosecution b... | The Athenians do not call this a prosecution b... | -350 | 1997 | 74 | the athenians do not call this a prosecution b... | ['the', 'athenians', 'do', 'not', 'call', 'thi... | the Athenians do not call this a prosecution ... |
| 3 | Plato - Complete Works | Plato | plato | What is this you say? | What is this you say? | -350 | 1997 | 21 | what is this you say? | ['what', 'is', 'this', 'you', 'say'] | what be this -PRON- say ? |
| 4 | Plato - Complete Works | Plato | plato | Someone must have indicted you, for you are no... | Someone must have indicted you, for you are no... | -350 | 1997 | 101 | someone must have indicted you, for you are no... | ['someone', 'must', 'have', 'indicted', 'you',... | someone must have indict -PRON- , for -PRON- ... |
print(data['school'].unique())
['plato' 'aristotle' 'empiricism' 'rationalism' 'analytic' 'continental' 'phenomenology' 'german_idealism' 'communism' 'capitalism' 'stoicism' 'nietzsche' 'feminism']
plato = data[data['school'] == 'plato']
plato_author = plato['author'].unique()
print('Plato:', plato_author)
aristotle = data[data['school'] == 'aristotle']
aristotle_author = aristotle['author'].unique()
print('Aristotle:', aristotle_author)
empiricism = data[data['school'] == 'empiricism']
empiricism_author = empiricism['author'].unique()
print('Empiricism:', empiricism_author)
rationalism = data[data['school'] == 'rationalism']
rationalism_author = rationalism['author'].unique()
print('Rationalism:', rationalism_author)
analytic = data[data['school'] == 'analytic']
analytic_author = analytic['author'].unique()
print('Analytic:', analytic_author)
continental = data[data['school'] == 'continental']
continental_author = continental['author'].unique()
print('Continental:', continental_author)
phenomenology = data[data['school'] == 'phenomenology']
phenomenology_author = phenomenology['author'].unique()
print('Phenomenology:', phenomenology_author)
german_idealism = data[data['school'] == 'german_idealism']
german_idealism_author = german_idealism['author'].unique()
print('German idealism:', german_idealism_author)
communism = data[data['school'] == 'communism']
communism_author = communism['author'].unique()
print('Communism:', communism_author)
capitalism = data[data['school'] == 'capitalism']
capitalism_author = capitalism['author'].unique()
print('Capitalism:', capitalism_author)
stoicism = data[data['school'] == 'stoicism']
stoicism_author = stoicism['author'].unique()
print('Stoicism:', stoicism_author)
nietzsche = data[data['school'] == 'nietzsche']
nietzsche_author = nietzsche['author'].unique()
print('Nietzsche:', nietzsche_author)
feminism = data[data['school'] == 'feminism']
feminism_author = feminism['author'].unique()
print('Feminism:', feminism_author)
Plato: ['Plato'] Aristotle: ['Aristotle'] Empiricism: ['Locke' 'Hume' 'Berkeley'] Rationalism: ['Spinoza' 'Leibniz' 'Descartes' 'Malebranche'] Analytic: ['Russell' 'Moore' 'Wittgenstein' 'Lewis' 'Quine' 'Popper' 'Kripke'] Continental: ['Foucault' 'Derrida' 'Deleuze'] Phenomenology: ['Merleau-Ponty' 'Husserl' 'Heidegger'] German idealism: ['Kant' 'Fichte' 'Hegel'] Communism: ['Marx' 'Lenin'] Capitalism: ['Smith' 'Ricardo' 'Keynes'] Stoicism: ['Epictetus' 'Marcus Aurelius'] Nietzsche: ['Nietzsche'] Feminism: ['Wollstonecraft' 'Beauvoir' 'Davis']
data.groupby(['school'])[['title']].nunique('title')
| title | |
|---|---|
| school | |
| analytic | 11 |
| aristotle | 1 |
| capitalism | 3 |
| communism | 3 |
| continental | 6 |
| empiricism | 6 |
| feminism | 3 |
| german_idealism | 7 |
| nietzsche | 5 |
| phenomenology | 5 |
| plato | 1 |
| rationalism | 6 |
| stoicism | 2 |
schools = data['school'].unique()
data2 = [1,1,6,6,11,6,5,7,3,3,2,5,3]
fig = plt.figure(figsize =(10, 8))
plt.pie(data2, labels = schools,autopct='%1.1f%%')
plt.show()
earliest = data.groupby(['school'])[['original_publication_date']].min()
earliest
| original_publication_date | |
|---|---|
| school | |
| analytic | 1910 |
| aristotle | -320 |
| capitalism | 1776 |
| communism | 1848 |
| continental | 1961 |
| empiricism | 1689 |
| feminism | 1792 |
| german_idealism | 1781 |
| nietzsche | 1886 |
| phenomenology | 1907 |
| plato | -350 |
| rationalism | 1637 |
| stoicism | 125 |
schools_of_thought_date1 = data.groupby(['school'])[['original_publication_date']].min()
schools_of_thought_date2 = data.groupby(['school'])[['original_publication_date']].max()
diff = schools_of_thought_date2 - schools_of_thought_date1
diff
| original_publication_date | |
|---|---|
| school | |
| analytic | 75 |
| aristotle | 0 |
| capitalism | 160 |
| communism | 35 |
| continental | 11 |
| empiricism | 90 |
| feminism | 189 |
| german_idealism | 39 |
| nietzsche | 2 |
| phenomenology | 43 |
| plato | 0 |
| rationalism | 73 |
| stoicism | 45 |
diff.plot(kind='barh', title='Duration length for each School of Thought', ylabel='original publication date',
xlabel='School of thought', figsize=(10, 4))
<AxesSubplot:title={'center':'Duration length for each School of Thought'}, ylabel='School of thought'>
sen_len = data.groupby(['school'])['sentence_length'].mean()
sen_len
school analytic 119.025205 aristotle 153.224953 capitalism 187.576289 communism 152.752311 continental 171.792060 empiricism 183.638051 feminism 153.083928 german_idealism 180.251329 nietzsche 116.599867 phenomenology 145.913345 plato 114.938018 rationalism 163.958996 stoicism 137.056410 Name: sentence_length, dtype: float64
sen_len.plot(kind='bar', title='Average Sentence Length for each School of Thought', ylabel='Sentence length',
xlabel='School of thought', figsize=(10, 4))
<AxesSubplot:title={'center':'Average Sentence Length for each School of Thought'}, xlabel='School of thought', ylabel='Sentence length'>
text1 = data[data.school=='stoicism'].sentence_str.tolist()
text1 = ' '.join(text1).lower()
wordcloud1 = WordCloud(stopwords = STOPWORDS,
collocations=False,min_word_length=3, collocation_threshold=3, background_color='white').generate(text1)
text2 = data[data.school=='nietzsche'].sentence_str.tolist()
text2 = ' '.join(text2).lower()
wordcloud2 = WordCloud(stopwords = STOPWORDS,
collocations=False,min_word_length=3, collocation_threshold=3, background_color='white').generate(text2)
text3 = data[data.school=='communism'].sentence_str.tolist()
text3 = ' '.join(text3).lower()
wordcloud3 = WordCloud(stopwords = STOPWORDS,
collocations=False,min_word_length=3, collocation_threshold=3, background_color='white').generate(text3)
text4 = data[data.school=='capitalism'].sentence_str.tolist()
text4 = ' '.join(text4).lower()
wordcloud4 = WordCloud(stopwords = STOPWORDS,
collocations=False,min_word_length=3, collocation_threshold=3, background_color='white').generate(text4)
text5 = data[data.school=='feminism'].sentence_str.tolist()
text5 = ' '.join(text5).lower()
wordcloud5 = WordCloud(stopwords = STOPWORDS,
collocations=False,min_word_length=3, collocation_threshold=3, background_color='white').generate(text5)
text6 = data[data.school=='empiricism'].sentence_str.tolist()
text6 = ' '.join(text6).lower()
wordcloud6 = WordCloud(stopwords = STOPWORDS,
collocations=False,min_word_length=3, collocation_threshold=3, background_color='white').generate(text6)
text7 = data[data.school=='rationalism'].sentence_str.tolist()
text7 = ' '.join(text7).lower()
wordcloud7 = WordCloud(stopwords = STOPWORDS,
collocations=False,min_word_length=3, collocation_threshold=3, background_color='white').generate(text7)
text8 = data[data.school=='phenomenology'].sentence_str.tolist()
text8 = ' '.join(text8).lower()
wordcloud8 = WordCloud(stopwords = STOPWORDS,
collocations=False,min_word_length=3, collocation_threshold=3, background_color='white').generate(text8)
text9 = data[data.school=='continental'].sentence_str.tolist()
text9 = ' '.join(text9).lower()
wordcloud9 = WordCloud(stopwords = STOPWORDS,
collocations=False,min_word_length=3, collocation_threshold=3, background_color='white').generate(text9)
text10 = data[data.school=='plato'].sentence_str.tolist()
text10 = ' '.join(text10).lower()
wordcloud10 = WordCloud(stopwords = STOPWORDS,
collocations=False,min_word_length=3, collocation_threshold=3, background_color='white').generate(text10)
text11 = data[data.school=='german_idealism'].sentence_str.tolist()
text11 = ' '.join(text11).lower()
wordcloud11 = WordCloud(stopwords = STOPWORDS,
collocations=False,min_word_length=3, collocation_threshold=3, background_color='white').generate(text11)
text12 = data[data.school=='aristotle'].sentence_str.tolist()
text12 = ' '.join(text12).lower()
wordcloud12 = WordCloud(stopwords = STOPWORDS,
collocations=False,min_word_length=3, collocation_threshold=3, background_color='white').generate(text12)
text13 = data[data.school=='analytic'].sentence_str.tolist()
text13 = ' '.join(text13).lower()
wordcloud13 = WordCloud(stopwords = STOPWORDS,
collocations=False,min_word_length=3, collocation_threshold=3, background_color='white').generate(text13)
fig, ((ax1, ax2, ax3), (ax4, ax5, ax6), (ax7, ax8, ax9), (ax10, ax11, ax12), (ax13, ax14, ax15)) = plt.subplots(5, 3, figsize=(25, 25))
fig.suptitle('Word Clouds for Each School of Thought', size=40, fontweight='bold')
ax1.imshow(wordcloud1)
ax1.set_title('Stoicism', size=25, pad=20, fontweight='bold')
ax2.imshow(wordcloud2)
ax2.set_title('Nietzsche', size=25, pad=20, fontweight='bold')
ax3.imshow(wordcloud3)
ax3.set_title('Communism', size=25, pad=20, fontweight='bold')
ax4.imshow(wordcloud4)
ax4.set_title('Capitalism', size=25, pad=20, fontweight='bold')
ax5.imshow(wordcloud5)
ax5.set_title('Feminism', size=25, pad=20, fontweight='bold')
ax6.imshow(wordcloud6)
ax6.set_title('Empiricism', size=25, pad=20, fontweight='bold')
ax7.imshow(wordcloud7)
ax7.set_title('Rationalism', size=25, pad=20, fontweight='bold')
ax8.imshow(wordcloud8)
ax8.set_title('Phenomenology', size=25, pad=20, fontweight='bold')
ax9.set_title('Continental', size=25, pad=20, fontweight='bold')
ax9.imshow(wordcloud9)
ax10.set_title('Plato', size=25, pad=20, fontweight='bold')
ax10.imshow(wordcloud10)
ax11.set_title('German idealism', size=25, pad=20, fontweight='bold')
ax11.imshow(wordcloud11)
ax12.set_title('Aristotle', size=25, pad=20, fontweight='bold')
ax12.imshow(wordcloud12)
ax13.set_title('Analytic', size=25, pad=20, fontweight='bold')
ax13.imshow(wordcloud13)
for ax in fig.axes:
ax.axis('off')
plt.show()
stop_words = ["thing", "things"] + list(STOPWORDS)
text1 = data[data.school=='stoicism'].sentence_str.tolist()
text1 = ' '.join(text1).lower()
wordcloud1 = WordCloud(stopwords = stop_words,
min_word_length=5, collocations=False, background_color='white').generate(text1)
text2 = data[data.school=='nietzsche'].sentence_str.tolist()
text2 = ' '.join(text2).lower()
wordcloud2 = WordCloud(stopwords = stop_words,
collocations=False,min_word_length=5, collocation_threshold=3, background_color='white').generate(text2)
text3 = data[data.school=='communism'].sentence_str.tolist()
text3 = ' '.join(text3).lower()
wordcloud3 = WordCloud(stopwords = stop_words,
collocations=False,min_word_length=5, collocation_threshold=3, background_color='white').generate(text3)
text4 = data[data.school=='capitalism'].sentence_str.tolist()
text4 = ' '.join(text4).lower()
wordcloud4 = WordCloud(stopwords = stop_words,
collocations=False,min_word_length=5, collocation_threshold=3, background_color='white').generate(text4)
text5 = data[data.school=='feminism'].sentence_str.tolist()
text5 = ' '.join(text5).lower()
wordcloud5 = WordCloud(stopwords = stop_words,
collocations=False,min_word_length=5, collocation_threshold=3, background_color='white').generate(text5)
text6 = data[data.school=='empiricism'].sentence_str.tolist()
text6 = ' '.join(text6).lower()
wordcloud6 = WordCloud(stopwords = stop_words,
collocations=False,min_word_length=5, collocation_threshold=3, background_color='white').generate(text6)
text7 = data[data.school=='rationalism'].sentence_str.tolist()
text7 = ' '.join(text7).lower()
wordcloud7 = WordCloud(stopwords = stop_words,
collocations=False,min_word_length=5, collocation_threshold=3, background_color='white').generate(text7)
text8 = data[data.school=='phenomenology'].sentence_str.tolist()
text8 = ' '.join(text8).lower()
wordcloud8 = WordCloud(stopwords = stop_words,
collocations=False,min_word_length=5, collocation_threshold=3, background_color='white').generate(text8)
text9 = data[data.school=='continental'].sentence_str.tolist()
text9 = ' '.join(text9).lower()
wordcloud9 = WordCloud(stopwords = stop_words,
collocations=False,min_word_length=5, collocation_threshold=3, background_color='white').generate(text9)
text10 = data[data.school=='plato'].sentence_str.tolist()
text10 = ' '.join(text10).lower()
wordcloud10 = WordCloud(stopwords = stop_words,
collocations=False,min_word_length=5, collocation_threshold=3, background_color='white').generate(text10)
text11 = data[data.school=='german_idealism'].sentence_str.tolist()
text11 = ' '.join(text11).lower()
wordcloud11 = WordCloud(stopwords = stop_words,
collocations=False,min_word_length=5, collocation_threshold=3, background_color='white').generate(text11)
text12 = data[data.school=='aristotle'].sentence_str.tolist()
text12 = ' '.join(text12).lower()
wordcloud12 = WordCloud(stopwords = stop_words,
collocations=False,min_word_length=5, collocation_threshold=3, background_color='white').generate(text12)
text13 = data[data.school=='analytic'].sentence_str.tolist()
text13 = ' '.join(text13).lower()
wordcloud13 = WordCloud(stopwords = stop_words,
collocations=False,min_word_length=5, collocation_threshold=3, background_color='white').generate(text13)
fig, ((ax1, ax2, ax3), (ax4, ax5, ax6), (ax7, ax8, ax9), (ax10, ax11, ax12), (ax13, ax14, ax15)) = plt.subplots(5, 3, figsize=(25, 25))
fig.suptitle('Word Clouds for Each School of Thought', size=40, fontweight='bold')
ax1.imshow(wordcloud1)
ax1.set_title('Stoicism', size=25, pad=20, fontweight='bold')
ax2.imshow(wordcloud2)
ax2.set_title('Nietzsche', size=25, pad=20, fontweight='bold')
ax3.imshow(wordcloud3)
ax3.set_title('Communism', size=25, pad=20, fontweight='bold')
ax4.imshow(wordcloud4)
ax4.set_title('Capitalism', size=25, pad=20, fontweight='bold')
ax5.imshow(wordcloud5)
ax5.set_title('Feminism', size=25, pad=20, fontweight='bold')
ax6.imshow(wordcloud6)
ax6.set_title('Empiricism', size=25, pad=20, fontweight='bold')
ax7.imshow(wordcloud7)
ax7.set_title('Rationalism', size=25, pad=20, fontweight='bold')
ax8.imshow(wordcloud8)
ax8.set_title('Phenomenology', size=25, pad=20, fontweight='bold')
ax9.set_title('Continental', size=25, pad=20, fontweight='bold')
ax9.imshow(wordcloud9)
ax10.set_title('Plato', size=25, pad=20, fontweight='bold')
ax10.imshow(wordcloud10)
ax11.set_title('German idealism', size=25, pad=20, fontweight='bold')
ax11.imshow(wordcloud11)
ax12.set_title('Aristotle', size=25, pad=20, fontweight='bold')
ax12.imshow(wordcloud12)
ax13.set_title('Analytic', size=25, pad=20, fontweight='bold')
ax13.imshow(wordcloud13)
for ax in fig.axes:
ax.axis('off')
plt.show()